library(DT)
library(rcartocolor)
library(rnaturalearth)
library(gtsummary)
library(ltm)
library(tidyverse)
library(GGally)
library(broom)
library(ggplot2)
theme_set(theme_bw())
The questions in our survey are as follows:
source("utils.R")
valid_data <- read_csv("data/processed_data.csv")
questions <- read_csv("data/questions.csv")
datatable(
questions,
options = list(pageLength = 100, scrollY = "400px"),
rownames = FALSE
)
The demographic characteristics of the participants including gender, age, and country of residence are summarized in Table 1.
demo_data <- valid_data |>
select(Gender, Age, Country, `Financial stability`)
table1 <- demo_data |>
# lumping countries with fewer than 4 responses as "Other"
mutate(Country = fct_lump_min(Country, 5)) |>
tbl_summary() |>
modify_caption("**Table 1. Respondent Characteristics**")
table1
| Characteristic | N = 1351 |
|---|---|
| Gender | |
| Female | 97 (72%) |
| Male | 34 (25%) |
| Prefer not to say | 4 (3.0%) |
| Age | 25 (21, 30) |
| Country | |
| Belgium | 27 (20%) |
| China | 8 (6.1%) |
| Ireland | 7 (5.3%) |
| United States of America | 5 (3.8%) |
| Vietnam | 47 (36%) |
| Other | 38 (29%) |
| Unknown | 3 |
| Financial stability | 6.00 (5.00, 7.00) |
| 1 n (%); Median (IQR) | |
# use map data from rnaturalearth to draw map
my_world <- rnaturalearth::ne_countries(scale = "medium", returnclass = "sf") |>
select(-geometry) |>
rename("Country" = name) |>
full_join(count(demo_data, Country), by = "Country")
continent_data <- my_world |>
select(Country, continent) |>
as_tibble() |>
select(-geometry) |>
mutate(continent = fct_collapse(continent, "America" = c("North America", "South America")))
my_world |>
ggplot() +
geom_sf(aes(fill = n)) +
coord_sf(crs = "+proj=eqearth +wktext") +
scale_fill_carto_c(palette = "Sunset") +
theme(
panel.background = element_rect(fill = "azure"),
panel.grid = element_line(color = "#ebebeb"),
legend.position = c(0.94, 0.75),
legend.background = element_blank(),
panel.border = element_rect(fill = NA),
)
age_breaks <- c(-4, 0, 4, 8)
demo_data |>
filter(Gender %in% c("Male", "Female")) |>
count(Age, Gender) |>
mutate(n = case_when(Gender == "Male" ~ -n, .default = n)) |>
ggplot() +
aes(x = Age, y = n, fill = Gender) +
geom_col() +
scale_fill_carto_d(palette = "ArmyRose", direction = -1) +
scale_y_continuous(breaks = age_breaks, labels = abs(age_breaks)) +
labs(y = NULL, subtitle = "Response counts by gender and age") +
guides(fill = guide_legend(position = "inside")) +
theme(legend.position.inside = c(0.8, 0.8))
The majority of our respondents are female and in their 20s.
demo_data |>
drop_na(`Financial stability`) |>
ggplot() +
aes(
x = `Financial stability`,
fill = `Financial stability`,
group = `Financial stability`
) +
geom_bar() +
scale_fill_carto_c(guide = "none", palette = "Teal") +
scale_x_continuous(breaks = seq(2, 10, 2)) +
labs(y = NULL, subtitle = "Financial stability distribution")
How consistent are the responses to the items in the Q1 and Q6 construct?
q1 <- valid_data |>
select(starts_with("q1_"))
al1 <- cronbach.alpha(q1, CI = TRUE, standardized = TRUE)
al1
##
## Standardized Cronbach's alpha for the 'q1' data-set
##
## Items: 4
## Sample units: 135
## alpha: 0.925
##
## Bootstrap 95% CI based on 1000 samples
## 2.5% 97.5%
## 0.896 0.945
A Cronbach’s alpha of 0.925 (95% CI: 0.896, 0.945) indicates a very high level of internal consistency among the items in the scale, suggesting that they are all reliably measuring the same underlying construct of fiber awareness before purchase.
q6 <- valid_data |>
select(starts_with("q6_"))
al6 <- cronbach.alpha(q6, CI = TRUE, standardized = TRUE)
al6
##
## Standardized Cronbach's alpha for the 'q6' data-set
##
## Items: 5
## Sample units: 135
## alpha: 0.845
##
## Bootstrap 95% CI based on 1000 samples
## 2.5% 97.5%
## 0.794 0.881
A Cronbach’s alpha of 0.845 (95% CI: 0.794, 0.881) indicates a high level of internal consistency among the items in the scale, suggesting that they are all reliably measuring the same underlying construct of fashion involvement.
Opinions and likelihood of a purchase regarding fiber types
q2 <- valid_data |>
select(starts_with("q2")) |>
rename_with(~ gsub("q2_sweater_", "", .x)) |>
mutate(id = row_number()) |>
pivot_longer(-id) |>
mutate(name = fct_relevel(name, tolower(sorted_fibers)))
q2 |>
ggplot() +
aes(y = value, x = name, fill = name, color = name) +
geom_violin(alpha = 0.2, adjust = 1.5) +
geom_jitter(alpha = 0.4, width = 0.2, height = 0.2) +
scale_fill_carto_d(guide = "none") +
scale_color_carto_d(guide = "none") +
scale_y_continuous(breaks = 1:7, labels = sorted_likeliness) +
labs(
y = NULL, x = NULL,
subtitle = "How likely is it that you will buy a sweater made of ..."
)
Observations: …
q3 <- valid_data |>
select(starts_with("q3")) |>
rename_with(~ gsub("q3_impact_", "", .x)) |>
mutate(id = row_number()) |>
pivot_longer(-id) |>
mutate(name = fct_relevel(name, sorted_fibers))
q3 |>
ggplot() +
aes(y = value, x = name, fill = name, color = name) +
geom_violin(alpha = 0.2, adjust = 2) +
geom_jitter(alpha = 0.4, width = 0.2, height = 0.2) +
scale_fill_carto_d(guide = "none") +
scale_color_carto_d(guide = "none") +
scale_y_continuous(
breaks = 1:7,
labels = sorted_impact
) +
labs(y = NULL, x = NULL, subtitle = "How significant is the environmental impact of producing...")
q4_wide <- valid_data |>
select(starts_with("q4")) |>
rename_with(~ gsub("q4_opinion_", "", .x))
q4 <- q4_wide |>
mutate(id = row_number()) |>
pivot_longer(-id) |>
mutate(name = fct_relevel(name, sorted_fibers))
q4 |>
ggplot() +
aes(y = value, x = name, fill = name, color = name) +
geom_violin(alpha = 0.2, adjust = 2) +
geom_jitter(alpha = 0.4, width = 0.2, height = 0.2) +
scale_fill_carto_d(guide = "none") +
scale_color_carto_d(guide = "none") +
scale_y_continuous(
breaks = 1:7,
labels = sorted_like
) +
labs(y = NULL, x = NULL, subtitle = "What is your opinion of these fibers?")
Some observations:
Question: Are these rankings associated with each other? e.g. does someone who likes cashmere also like cotton and linen?
We look at a few correlation tests:
ggpairs(q4_wide)
There is statistically significant positive correlation in the rankings between:
Hmm… maybe this is not the most interesting result…
q5 <- valid_data |>
select(starts_with("q5_knowledge_")) |>
rename_with(~ gsub("q5_knowledge_", "", .x)) |>
mutate(id = row_number()) |>
pivot_longer(-id) |>
mutate(
value = fct_relevel(
value,
c("I do not know", "Produced synthetically", "Plant-based", "Animal-based")
),
name = fct_relevel(name, sorted_fibers)
)
q5 |>
count(name, value) |>
mutate(color = if_else(n > 60, "white", "black")) |>
ggplot() +
aes(x = name, y = value) +
geom_tile(aes(fill = n), color = "white") +
geom_text(aes(label = n, color = color)) +
scale_color_identity() +
scale_fill_carto_c(palette = "Teal") +
labs(x = NULL, y = NULL) +
theme_minimal() +
theme(panel.grid.major = element_blank())
q6_columns <- c(
"I like fashion.",
"I often buy clothes in general.",
"I read fashion news regularly.",
"I try to keep my wardrobe\nup-to-date with fashion trends.",
"I like to shop for clothes."
)
q6 <- valid_data |>
select(starts_with("q6"), Country) |>
left_join(continent_data, by = "Country") |>
select(-Country) |>
`colnames<-`(c(q6_columns, "continent")) |>
mutate(id = row_number()) |>
pivot_longer(-c(id, continent)) |>
drop_na(continent)
q6 |>
ggplot() +
aes(x = value, y = continent, fill = continent, color = continent) +
facet_wrap(~name, ncol = 5) +
geom_violin(alpha = 0.2, adjust = 1.5) +
geom_jitter(alpha = 0.4, width = 0.1, height = 0.1) +
scale_fill_carto_d(guide = "none", palette = "Safe") +
scale_color_carto_d(guide = "none", palette = "Safe") +
labs(y = NULL, x = NULL)
Purchase intention ~ perceived sustainability + attitude/opinion + knowledge + avg_involvement + avg_awareness
lm_results <- vector("list")
for (type in sorted_fibers) {
# type can be "Cashmere", "Polyester", etc.
dat <- valid_data |>
select(ends_with(tolower(type)), starts_with("avg_"), -contains("knowledge")) |>
rename_all(~ gsub(type, "", .x, ignore.case = TRUE))
fit <- lm(q2_sweater_ ~ ., dat)
lm_results[[type]] <- summary(fit)$coefficients[, "Pr(>|t|)"] |>
c(glance(fit) |>
select(df, df.residual, r.squared, adj.r.squared, f.value = statistic, p.value))
# enframe() |> unnest(value) |> column_to_rownames("name") |>
# `colnames<-`(type)
}
regression_table <- bind_rows(lm_results) |>
mutate(fiber_type = names(lm_results), .before = 1)
regression_table |>
rowwise() |>
mutate(
across(c(2:7, "p.value"), \(x) if (x < 0.001) "<0.001" else as.character(round(x, 3))),
across(r.squared:f.value, \(x) round(x, 3))
) |>
# datatable(
# options = list(scrollX = 700, dom = "t"),
# rownames = FALSE
# ) |>
# formatSignif(columns = 10:12, digits = 3) |>
# formatStyle(
# names(regression_table),
# backgroundColor = styleInterval(c(0.05), c("#f3e79b", NA)),
# ) |>
# formatStyle(
# names(regression_table),
# backgroundColor = styleEqual("<0.001", "#f3e79b"),
# ) |>
write_csv("data/regression_table.csv")
score_df <- valid_data |>
select(-starts_with("q")) |>
left_join(continent_data, by = "Country") |>
select(-Country)
involve_fit <- summary(lm(
avg_involvement ~ continent + Age + Gender + `Financial stability`,
data = score_df
))
involve_fit
##
## Call:
## lm(formula = avg_involvement ~ continent + Age + Gender + `Financial stability`,
## data = score_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2043 -0.8184 0.0337 0.9126 2.5428
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.702597 0.488831 11.666 < 2e-16 ***
## continentEurope 0.073462 0.230179 0.319 0.75015
## continentAmerica -0.242223 0.504574 -0.480 0.63203
## Age -0.038874 0.013816 -2.814 0.00569 **
## GenderMale -1.410249 0.259602 -5.432 2.79e-07 ***
## GenderPrefer not to say -1.548130 0.647124 -2.392 0.01823 *
## `Financial stability` -0.007761 0.060689 -0.128 0.89844
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.265 on 125 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.275, Adjusted R-squared: 0.2402
## F-statistic: 7.901 on 6 and 125 DF, p-value: 3.148e-07
Holding all other variables constant, for each year older, a person’s average score of involvement in fashion is estimated to decrease by approximately 0.04082 (P value = 0.00384). Also, on average, male respondents are estimated to have 1.46 lower involvement score compared to female respondents (P value < 0.001). Geography and financial stability do not have a significant association with a person’s average involvement in fashion.
aware_fit <- summary(lm(
avg_awareness ~ continent + Age + Gender + `Financial stability`,
data = score_df
))
aware_fit
##
## Call:
## lm(formula = avg_awareness ~ continent + Age + Gender + `Financial stability`,
## data = score_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.0741 -1.1603 0.3172 1.1457 2.5940
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.73533 0.61167 6.107 1.19e-08 ***
## continentEurope 0.22479 0.28802 0.780 0.4366
## continentAmerica 0.42646 0.63137 0.675 0.5006
## Age 0.04308 0.01729 2.492 0.0140 *
## GenderMale -0.70151 0.32484 -2.160 0.0327 *
## GenderPrefer not to say -0.11411 0.80974 -0.141 0.8882
## `Financial stability` 0.00547 0.07594 0.072 0.9427
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.583 on 125 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.07772, Adjusted R-squared: 0.03345
## F-statistic: 1.756 on 6 and 125 DF, p-value: 0.1136
Age has a significant positive association with fiber awareness preference before purchase. A person’s average score for fiber awareness is estimated to increase by approximately 0.045 for each year increase in age (P = 0.01402). Other variables such as gender, geography, and financial stability do not have a significant association with fiber awareness.
score_df |>
filter(Gender %in% c("Male", "Female")) |>
ggplot() +
aes(x = Age, y = avg_involvement) +
geom_smooth(
aes(color = Gender),
fill = "grey80",
method = "lm",
formula = "y~x"
) +
geom_jitter(height = 0.05, width = 0.05, alpha = 0.6) +
facet_wrap(~Gender) +
scale_color_carto_d(palette = "ArmyRose", direction = -1, guide = "none") +
labs(
y = "Average involvement score",
title = "Fashion involvement",
subtitle = "\"Do you like fashion?\""
)
score_df |>
filter(Gender %in% c("Male", "Female")) |>
ggplot() +
aes(x = Age, y = avg_awareness) +
geom_smooth(
aes(color = Gender),
fill = "grey80",
method = "lm",
formula = "y~x"
) +
geom_jitter(height = 0.05, width = 0.05, alpha = 0.6) +
facet_wrap(~Gender) +
scale_color_carto_d(palette = "ArmyRose", direction = -1, guide = "none") +
labs(
y = "Average awareness score",
title = "Fiber awareness preference before purchase",
subtitle = "\"Before making a purchase, I like to know the fiber type of the clothing item.\""
)